* Merge different data files from MEPS

clear
use "${data}\Consolidated_data_reshaped.dta"

* sample restrictions
drop if AGE1<24 | AGE1>=84
keep if SEX==1 // keep males

* drop if no educ or race info
drop if educ_type==.
drop if RACE_sum==.

drop if Sample_Weight==. | Sample_Weight<=0
drop if PANEL==.

sort ID
save "${data}/MEPS_merging_all.dta", replace


*** MERGE WITH HEALTH VARIABLES
clear
use "${data}/Health_variables.dta"

format ID %16.0g
replace ID=ID+PANEL*10000000000 

keep ID H_1 H_3 H_5 HEY1_dp HEY2_dp HEY1_du HEY2_du HEY1_s HEY2_s R_3 R_5 LONGWT

rename H_1 H1
rename H_3 H3
rename H_5 H5
label var H1 "Health" 

label define hlthlab 1 "H=Poor" 2 "H=Fair" 3 "H=Good"
label values H1 hlthlab
label values H3 hlthlab
label values H5 hlthlab

rename R_3 R3
rename R_5 R5

rename HEY1_du du
rename HEY1_dp dp 
rename HEY1_s s

rename HEY2_du du2
rename HEY2_dp dp2
rename HEY2_s s2

label var du "Du"
label var dp "Dp"
label var s "S"

gen type2=1 if du==0 & dp==0 & s==0
 replace type2=2 if du==0 & dp==1 & s==0
  replace type2=3 if du==0 & dp==0 & s==1
   replace type2=4 if du==0 & dp==1 & s==1
    replace type2=5 if du==1 & dp==0 & s==0
	 replace type2=6 if du==1 & dp==1 & s==0
	  replace type2=7 if du==1 & dp==0 & s==1
	   replace type2=8 if du==1 & dp==1 & s==1
	   
 gen type2_2=1 if du2==0 & dp2==0 & s2==0
 replace type2_2=2 if du2==0 & dp2==1 & s2==0
  replace type2_2=3 if du2==0 & dp2==0 & s2==1
   replace type2_2=4 if du2==0 & dp2==1 & s2==1
    replace type2_2=5 if du2==1 & dp2==0 & s2==0
	 replace type2_2=6 if du2==1 & dp2==1 & s2==0
	  replace type2_2=7 if du2==1 & dp2==0 & s2==1
	   replace type2_2=8 if du2==1 & dp2==1 & s2==1
	   
label define tp2 1 "du=0; dp=0; s=0" 2 "du=0; dp=1; s=0" 3 "du=0; dp=0; s=1" 4 "du=0; dp=1; s=1" 5 "du=1; dp=0; s=0" 6 "du=1; dp=1; s=0" 7 "du=1; dp=0; s=1" 8 "du=1; dp=1; s=1"
label values type2 tp2	
label values type2_2 tp2


gen I_any_shock=0 if du==0 & dp==0 & s==0
replace I_any_shock=1 if du==1 | dp==1 | s==1

gen I_any_shock_2=0 if du2==0 & dp2==0 & s2==0
replace I_any_shock_2=1 if du2==1 | dp2==1 | s2==1

label var I_any_shock "Has Health Shock"
label var I_any_shock_2 "Has Health Shock"

label define any 0 "No Health Shock" 1 "1+ Health Shocks"
label values I_any_shock any
label values I_any_shock_2 any

sort ID
merge ID using "${data}/MEPS_merging_all.dta" 
tab _merge

bysort PANEL: tab _merge
* _merge=1 can be ignored because it's only for PANEL==.
keep if _merge==3

label values educ_group edclab
label var educ_group "Education"

drop _merge 


**income groups
* construct income terciles within educ and age - install egenmore
egen inc_tercile_Y1=xtile(TOTALINC_Y1) if RACE_sum==1 & AGE_YR1<65 & INSCOP_Y1==1, n(3) by(educ_group AGE_YR1)

label var inc_tercile_Y1 "Income Tercile"
label define inc_t 1 "1st Income Tercile" 2 "2nd Income Tercile" 3 "3rd Income Tercile" 
label values inc_tercile_Y1 inc_t

* construct income quintiles based on whites
xtile inc_quintile_Y1=TOTALINC_Y1 [aw=Sample_Weight] if AGE1<65 & INSCOP_Y1==1 & RACE_sum==1 ,n(5)
label var inc_quintile_Y1 "Income Quintile"
label define inc_q 1 "1st" 2 "2nd" 3 "3rd" 4 "4th" 5 "5th"
label values inc_quintile_Y1 inc_q

egen threshold1= max(TOTALINC_Y1) if inc_quintile_Y1==1
egen threshold2= max(TOTALINC_Y1) if inc_quintile_Y1==2
egen threshold3= max(TOTALINC_Y1) if inc_quintile_Y1==3
egen threshold4= max(TOTALINC_Y1) if inc_quintile_Y1==4
egen threshold=rowmax(threshold* )

 tab threshold
bysort RACE_sum: tab threshold
replace inc_quintile_Y1=1 if TOTALINC_Y1< 19.2174 & (RACE_sum==2 | RACE_sum==3)
replace inc_quintile_Y1=2 if TOTALINC_Y1< 34.27825 & (RACE_sum==2 | RACE_sum==3) & inc_quintile_Y1==.
replace inc_quintile_Y1=3 if TOTALINC_Y1< 50.17711 & (RACE_sum==2 | RACE_sum==3) & inc_quintile_Y1==.
replace inc_quintile_Y1=4 if TOTALINC_Y1<  75.531 & (RACE_sum==2 | RACE_sum==3) & inc_quintile_Y1==.
replace inc_quintile_Y1=5 if TOTALINC_Y1>=  75.531 & (RACE_sum==2 | RACE_sum==3) & inc_quintile_Y1==.

table   RACE_sum educ_group if AGE1<65, stat(fvpercent inc_quintile_Y1)



egen threshold1_new= max(threshold1)
egen threshold2_new= max(threshold2)
egen threshold3_new= max(threshold3)
egen threshold4_new= max(threshold4)

gen inc_quintile_Y2 = 1 if TOTALINC_Y2 <= threshold1_new
replace inc_quintile_Y2=2 if TOTALINC_Y2 <= threshold2_new & inc_quintile_Y2==.
replace inc_quintile_Y2=3 if TOTALINC_Y2 <= threshold3_new & inc_quintile_Y2==.
replace inc_quintile_Y2=4 if TOTALINC_Y2 <= threshold4_new & inc_quintile_Y2==.
replace inc_quintile_Y2=5 if TOTALINC_Y2 > threshold4_new & inc_quintile_Y2==. & TOTALINC_Y2!=.

label var inc_quintile_Y2 "Income Quintile"
label values inc_quintile_Y2 inc_q

drop threshold1* threshold2* threshold3* threshold4* threshold


tab PANEL
sort ID
save "${data}/MEPS_merged_Health_all_races.dta", replace

